{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup a regression experiment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn.datasets import load_boston\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "boston = load_boston()\n",
    "feature_names = list(boston.feature_names)\n",
    "X, y = pd.DataFrame(boston.data, columns=feature_names), boston.target\n",
    "\n",
    "seed = 1\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Explore the dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from interpret import show\n",
    "from interpret.data import Marginal\n",
    "\n",
    "marginal = Marginal().explain_data(X_train, y_train, name = 'Train Data')\n",
    "show(marginal)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train the Explainable Boosting Machine (EBM)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from interpret.glassbox import ExplainableBoostingRegressor, LinearRegression, RegressionTree\n",
    "\n",
    "ebm = ExplainableBoostingRegressor(random_state=seed)\n",
    "ebm.fit(X_train, y_train)   #Works on dataframes and numpy arrays"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Global Explanations: What the model learned overall"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ebm_global = ebm.explain_global(name='EBM')\n",
    "show(ebm_global)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Local Explanations: How an individual prediction was made"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ebm_local = ebm.explain_local(X_test[:5], y_test[:5], name='EBM')\n",
    "show(ebm_local)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Evaluate EBM performance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "from interpret import show\n",
    "from interpret.perf import RegressionPerf\n",
    "\n",
    "ebm_perf = RegressionPerf(ebm.predict).explain_perf(X_test, y_test, name='EBM')\n",
    "show(ebm_perf)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Let's test out a few other Explainable Models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from interpret.glassbox import LinearRegression, RegressionTree\n",
    "\n",
    "lr = LinearRegression(random_state=seed)\n",
    "lr.fit(X_train, y_train)\n",
    "\n",
    "rt = RegressionTree(random_state=seed)\n",
    "rt.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Compare performance using the Dashboard"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "lr_perf = RegressionPerf(lr.predict).explain_perf(X_test, y_test, name='Linear Regression')\n",
    "rt_perf = RegressionPerf(rt.predict).explain_perf(X_test, y_test, name='Regression Tree')\n",
    "\n",
    "show(lr_perf)\n",
    "show(rt_perf)\n",
    "show(ebm_perf)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Glassbox: All of our models have global and local explanations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "lr_global = lr.explain_global(name='Linear Regression')\n",
    "rt_global = rt.explain_global(name='Regression Tree')\n",
    "\n",
    "show(lr_global)\n",
    "show(rt_global)\n",
    "show(ebm_global)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Dashboard: look at everything at once"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Do everything in one shot with the InterpretML Dashboard by passing a list into show\n",
    "\n",
    "show([marginal, lr_global, lr_perf, rt_global, rt_perf, ebm_global, ebm_perf])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
